*****************************************************
* 		 3. Wage bin employment, 1997q1-2019q4 		*
*		Brochu, Green, Lemieux, and Townsend		*
* 					RA: Ratzanyel Rincon 			*
*****************************************************

/*
Comments by James
1) opted to use Cengiz et al. convention for naming wage bins. First wagebin is "100" and includes all individuals with real hourly wages (measured in cents) in the interval [0,125)
	Bins thereafter are named by the wage floor defining the bin (e.g 125 is the bin for the wages [125, 150). The sole other exception for the top coded bin "3000", which includes
	individuals with wages of $30.00 and above.
	
2) removed from the employed the handful of self-employed reporting an hourly wage...unlikely to make much difference.

3) Filled in empty wagebins and added zero obs.

4) First bin doesn't exist in Canadian data with 2002 deflator...nominal wages are bottom coded at "2".

5) Presumably the reason that the minimum wage (treatment) variables are constructed after the CPS data is merged with the minimum wage
	is to implement the 2% test...i.e. 2 percent of the workforce is affected by the minimum wage increase.
	
6) Lag of MW can't be computed for the first quarter, since there is currently nothing to attach the MW in the last quarter of 1996 to.
	





*/

clear all
set more off

* Globals
*global master_path = "C:\Users\jimt2\Dropbox\Brochu, Green, Lemieux, and Townsend\data"
global dir_path = "P:\Townsend_5609_wages\Cengiz"
global data = "$dir_path/data"
global temp = "$data/temp"
global log = "$dir_path/logfiles"

cap log close
cap log using "$log/2 Wagebin employment_JT", replace text

* Tempfiles
tempfile N_data
tempfile N_data_noflagB
tempfile E_data
tempfile E_data_noflagB
tempfile scale_data

tempfile Wagebin_data

* Parameters
local year_min = 1997
local year_max = 2019
local nwagebins = 117
local months 2 5 8 11 // Middle months for each quarter

/* Create size of population aged 15 and over by province date
: N_pt */

*Convert minimum wage data and cpi to quarterly

use $data\min_wag_1996_2017,clear
gen date = qofd(dofm(ym(year, month)))

rename mwage mw 
rename cpi cpi_base_2002

collapse (max) mw (mean) cpi_base_2002,by(date prov)
tempfile min_wage_data
save `min_wage_data'


use "$data/panel_6M_sample_mod", clear


*Criteria and variable name changes from hazard estimator programs.
replace wage1=wage1*100 if syear1>=2013

rename smth1 month
rename syear1 year

sort prov year month



gen date = qofd(dofm(ym(year, month)))


merge m:1 prov date using `min_wage_data'
drop if _merge==1
cap drop _merge
assert cpi~=.




gen rec_num=1

*Population without flag_restrictionsB

preserve

collapse (count) rec_num (rawsum) count_N_pt_noflagB=rec_num [fw=finalwt1], by(prov date)

rename rec_num N_pt_noflagB
label var N_pt "Population aged 15 and over - Unrestricted"

sum
* Save temp data
save `N_data_noflagB', replace
restore


*Population with flag_restrictionsB

preserve
drop if flag_restrictionsB==1 /*Same restriction flag as used in density estimation */

collapse (count) rec_num (rawsum) count_N_pt=rec_num [fw=finalwt1], by(prov date)

rename rec_num N_pt
label var N_pt "Population aged 15 and over"

* Save temp data
save `N_data', replace

restore





*Criteria and variable name changes from hazard estimator programs.


*Now working with employed paid workers

keep if lfsstat1<=2
assert lfsstat1~=.
keep if cowmain1<=2
assert cowmain1~=.



preserve

collapse (count) rec_num (rawsum) count_E_pt_noflagB=rec_num [fw=finalwt1], by(prov date)

rename rec_num E_pt_noflagB
label var E_pt_noflagB "Employment, aged 15-59 - noflagB"

* Save temp data
save `E_data_noflagB', replace
restore

drop if flag_restrictionsB==1 /*Same restriction flag as used in density estimation */


preserve

collapse (count) rec_num (rawsum) count_E_pt=rec_num [fw=finalwt1], by(prov date)

rename rec_num E_pt
label var E_pt "Employment, aged 15-59"

* Save temp data
save `E_data', replace
merge m:1 prov date using `N_data_noflagB',nogen
merge m:1 prov date using `N_data',nogen
merge 1:1 prov date using `E_data_noflagB',nogen

*Alpha is a province/quarter specific scaling factor
*which converts the resstricted provincial/quarter employment rate
*into the unrestricted
gen alpha=(E_pt_noflagB/N_pt_noflagB)*(N_pt/E_pt)
sum alpha
save `scale_data', replace

restore



gen rhrlyearn=wage1/cpi_base_2002*100

* Gen wagebin variable

********************************************************************************
****************     Wage bin variable *****************************************
********************************************************************************
*This variable will be useful in collapsing
gen wage=rhrlyearn*100
gen wagebins=0
replace wagebins=100 if wage<125
replace wagebins=floor(wage/25)*25 if wage>=125 & wage<3000
replace wagebins=3000 if wage>=3000
replace wagebins=. if rhrlyearn==.
assert wagebins!=0

drop if wagebins==. /* Removes self-employed, etc. */

* Drop observations without wage bin
*drop if wagebin == .
gen cellcount=1

collapse (count) rec_num (rawsum) cellcount  [fw=finalwt1], by(prov date wagebins year)
 
replace cellcount=0 if cellcount==.

fillin prov date wagebins 
replace rec_num=0 if _fillin==1


rename rec_num E_pjt
label var E_pjt "Wage bin employment"

merge m:1 date prov using `min_wage_data'
drop if _merge==1
cap drop _merge
merge m:1 date prov using `N_data'
drop if _merge==1
cap drop _merge

merge m:1 date prov  using `scale_data'
drop if _merge==1
cap drop _merge

*some obs no longer have years if filled in.
egen nyear=mean(year),by(date)
replace year=nyear if year==.
drop nyear 


* Gen dependent var 
gen Y_pjt = E_pjt/N_pt*alpha
label var Y_pjt "Wage bin employment per capita" 

drop if E_pjt==. /*first two quarters of 1996 */


*Check
egen Y_pt_calc=sum(Y_pjt),by(prov date)
gen Y_pt=E_pt_noflagB/N_pt_noflagB

sum Y*

rename date quarterdate
save "$data/Y_variable", replace


 
*Summay statistics for vetting
preserve
format quarterdate %tq
label define PROV 10 "Nfld." 11 "P.E.I" 12 "Nova Scotia" 13 "New Brunswick" 24 "Quebec" 35 "Ontario" 46 "Manitoba" 47 "Saskatchewan" 48 "Alberta" 59 "British Columbia"
label values prov PROV

collapse (mean) count_E_pt (mean) count_N_pt (mean) count_E_pt_noflagB (mean) count_N_pt_noflagB ,by(prov quarterdate year)
gen one=1

table one if year>=1997,stat(sum count_E_pt) stat(sum count_N_pt) stat(sum count_E_pt_noflagB) stat(sum count_N_pt_noflagB) 

table quarterdate prov if year>=1997,stat(sum count_E_pt) stat(sum count_N_pt) stat(sum count_E_pt_noflagB) stat(sum count_N_pt_noflagB) 
collect preview
collect export "$dir_path\raw_counts.xlsx", open replace



log close